
from pyspark import SparkConf, SparkContext
conf = SparkConf().setAppName("test_spark_app").setMaster("local")
# 执行环境入口对象
sc = SparkContext(conf=conf)

# spark读取文件
textFile_rdd = sc.textFile('./BB-baseFile.txt').flatMap(lambda x:x.split(' ')).collect()
print(textFile_rdd)
count = 0
for text in textFile_rdd:
    if text != '':
        count += text.count('甲醛')
print(count)